# Load libraries
library(DESeq2)
library(edgeR)
library(limma)
library(variancePartition)
library(foreach)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggh4x)
library(magrittr)
library(enrichplot)
library(patchwork)
library(fgsea)
library(doParallel)
library(tidyverse)

# Define main path
# main_path <- file.path("/home/grocamora/RytenLab-Research/18-DGE_limma_dream")# here::here()

main_path <- file.path("/home/jbrenton/ASAP_Hardy_post_align_files/Final_Dataset/Hardy_Limma_Analysis_Final_version_only")

# Load helper functions
# source(file.path(main_path, "R/hf_additional.R"))
source(file.path(main_path, "R/hf_graph_and_themes.R"))
# source(file.path(main_path, "R/hf_intron_categories.R"))
source(file.path(main_path, "R/hf_reports.R"))

# Set options
options(dplyr.summarise.inform = FALSE)
options(lifecycle_verbosity = "warning")
knitr::opts_chunk$set(echo = T, warning = F, message = F, 
                      out.width="100%", fig.align = "center", dpi = 150,
                      fig.width = 7.2, fig.height = 7.2)

1 Background

Methodoly and documentation: TBD

This report includes the gene enrichment analysis of the differentially expressed genes found in the previous report. We decided to focus on three different databases: GO, KEGG and REACTOME.

First, we need to load the results from the previous report and set the parameters that will dictate the results:

# Paths
# main_path <- "/home/grocamora/RytenLab-Research/18-DGE_limma_dream"
# results_path <- file.path(main_path, "results/Hardy_NewCovs")
results_path <- file.path(main_path, "results")

hardy_dge_results_path <- file.path(results_path, "dge_variables/")
deg_all_output_path <- file.path(hardy_dge_results_path, "PD_Collapsed_dge_all.rds")
gene_enrichment_results_path <- file.path(results_path, "gene_enrichment_variables/")
dir.create(gene_enrichment_results_path, showWarnings = F, recursive = T)

formula_res_merge_path <- file.path(gene_enrichment_results_path, "pd_collapsed_formula_GO.rds")
formula_res_merge_red_path <- file.path(gene_enrichment_results_path, "pd_collapsed_formula_GO_red.rds")
formula_res_KEGG_path <- file.path(gene_enrichment_results_path, "pd_collapsed_formula_KEGG.rds")
formula_res_REACTOME_path <- file.path(gene_enrichment_results_path, "pd_collapsed_formula_REACTOME.rds")

# Load data
deg_all_output <- readRDS(deg_all_output_path)

# Report parameters
count_threshold <- 2
go_reduce_threshold <- 0.5

We will employ clusterProfiler for the enrichment analysis. We also need to load the gene information - i.e. gene symbol, ENTREZID and ENSEMBL ID.

# Load gene info
uniKeys <- AnnotationDbi::keys(org.Hs.eg.db, keytype = "ENSEMBL")
cols <- c("SYMBOL", "ENTREZID", "ENSEMBL")
cross_comp_list <- AnnotationDbi::select(org.Hs.eg.db, keys = uniKeys, columns = cols, keytype = "ENSEMBL")

# Remove duplicate symbols
non_ducplicates_idx <- which(duplicated(cross_comp_list$SYMBOL) == F)
cross_comp_list <- cross_comp_list[non_ducplicates_idx, ]

From the results previously loaded, we generate the clusters of data. Two different grouping scenearios are going to be considered: collapsed by brain region and brain region specific.

# Generate the cluster data combined
 cluster_data <- deg_all_output %>%
  # dplyr::filter(method == "limma") %>%
  # dplyr::select(-method) %>%
  dplyr::mutate(test = stringr::str_replace_all(test, "-", " vs ")) %>% 
  dplyr::mutate(test = factor(test, levels = c("PD vs Control"))) %>%
  dplyr::left_join(cross_comp_list, by = c("gene_id" = "ENSEMBL"), relationship = "many-to-many") %>%
  dplyr::select(ENTREZID, SYMBOL, logFC, adj.P.Val, tissue, sig_reg, test) %>%
  dplyr::filter(!is.na(ENTREZID))

cluster_data_sig <- cluster_data %>%
  dplyr::filter(sig_reg != "Unchanged") %>%
  dplyr::distinct(ENTREZID, tissue, test, .keep_all = T)
universe_genes <- cluster_data$ENTREZID %>% unique()

Changelog

v1.3

  • Updated to R v4.3.0

  • The GO reduction is now consistent with the Wood dataset.

v1.2

  • Fixed covariate identification mistake.

v1.1

  • Modified plots to better represent the pathways.

  • Refactoring code to better handle the different scenarios.

v1.0

  • Initial report.

  • Added GO term reduction.

1.1 Helper functions

generatePlotDataframe <- function(formula_res){
  if("parent_term" %in% colnames(formula_res@compareClusterResult)){
    description_names <- formula_res@compareClusterResult %>% 
      dplyr::distinct(Description, parent_term) %>% 
      dplyr::group_by(parent_term) %>% 
      dplyr::count() %>%
      dplyr::mutate(mod_description = stringr::str_to_title(paste0(parent_term, " (", n, ")"))) %>%
      dplyr::mutate(mod_description = paste0(strwrap(mod_description, width = 50, prefix = "\n", initial = ""),
                                             collapse = "")) %>%
      dplyr::select(parent_term, n, mod_description)
    
    plot_df <- formula_res@compareClusterResult %>% 
      dplyr::group_by(test, tissue, sig_reg, parent_term) %>%
      dplyr::filter(p.adjust == min(p.adjust)) %>%
      dplyr::filter(parent_sim_score == max(parent_sim_score)) %>%
      dplyr::ungroup() %>%
      dplyr::left_join(description_names, by = "parent_term") %>%
      dplyr::mutate(Description = mod_description) %>%
      # tidyr::separate(GeneRatio, into = c("gr1", "gr2"), convert = T) %>%
      # dplyr::mutate(geneRatio = gr1/gr2) %>%
      dplyr::group_by(Description) %>% 
      dplyr::mutate(min.padj = min(p.adjust)) %>% 
      dplyr::ungroup() %>%
      dplyr::arrange(desc(n), test, tissue, p.adjust) %>%
      dplyr::mutate(Description = factor(Description, levels = unique(Description)),
                    sig_reg = factor(sig_reg, levels = c("Significantly Downregulated", "Significantly Upregulated")),
                    test = factor(test, levels = c("PD vs Control")),
                    tissue = factor(tissue, levels = c("ACG", "IPL", "MFG", "MTG")))
  }else{
    plot_df <- formula_res@compareClusterResult %>% 
      dplyr::rowwise() %>% 
      dplyr::mutate(Description = stringr::str_to_title(Description),
                    Description = paste0(strwrap(Description, width = 50, prefix = "\n", initial = ""), 
                                         collapse = "")) %>% 
      dplyr::group_by(Description) %>% 
      dplyr::mutate(n = n(),
                    min.padj = min(p.adjust)) %>% 
      dplyr::ungroup() %>%
      dplyr::arrange(desc(n), min.padj, test, tissue, p.adjust) %>%
      dplyr::mutate(Description = factor(Description, levels = unique(Description)),
                    sig_reg = factor(sig_reg, levels = c("Significantly Downregulated", "Significantly Upregulated")),
                    test = factor(test, levels = c("PD vs Control")),
                    tissue = factor(tissue, levels = c("ACG", "IPL", "MFG", "MTG")))
  }
}

drawPathwaysGO <- function(df_plot, x = "test", x_expansion = c(0.5, 0.5), drop_facet = T, drop_x = F){
  plot <- plot_df %>% 
    ggplot(aes(x = .data[[x]], y = Description, fill = -log10(p.adjust))) +
    geom_tile(color = "black", width = 1, show.legend = T) +
    geom_hline(yintercept = seq(0.5, length(plot_df$Description)), color="black", linewidth = 0.2) +
    geom_vline(xintercept = seq(0.5, length(plot_df[, x, T])+1), color="black", linewidth = 0.2) +
    scale_y_discrete(limits = rev, expand = expansion()) +
    scale_x_discrete(drop = drop_x, expand = expansion(add = x_expansion)) +
    scale_fill_viridis_c(name = "-log"[1][0]~"(FDR)", breaks = c(0, 2, 4, 6, 8, 10),
                         na.value = "white", option = "cividis") +
    coord_equal() +
    labs(x = "", y = "GO Term", fill = "hola") +
    custom_gg_theme +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
          strip.text.x = element_text(size = 9, face = "plain"),
          panel.background = element_rect(fill = "white"),
          panel.grid.minor = element_blank(),
          panel.grid.major.y = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid = element_blank())
  
  # Facet the plots based on what they have on the X-axis
  if(x == "test"){
    plot <- plot +
      ggh4x::facet_nested(. ~ sig_reg, drop = drop_facet, 
                          labeller = labeller(sig_reg = c("Significantly Upregulated" = "Up-Reg.",
                                                          "Significantly Downregulated" = "Down-Reg.")))
  }else if(x == "tissue"){
    plot <- plot +
      ggh4x::facet_nested(. ~ sig_reg + test, drop = drop_facet, 
                          labeller = labeller(sig_reg = c("Significantly Upregulated" = "Up-Reg.",
                                                          "Significantly Downregulated" = "Down-Reg.")))
  }
  
  return(plot)
}

# New function to go with updated rrvgo - problem mainly with reduce sim function - both score parameter and parent_sim_score are changed
# scores is now a character vector choice of either "size" or "uniqueness" - uniqueness is default
# parent sim score is now divided into term uniqueness and term dissimilaity or something
updated_go_reduce<-function (pathway_df, orgdb = "org.Hs.eg.db", threshold = 0.7, 
                             scores = NULL, measure = "Wang") 
{
  if (!measure %in% c("Resnik", "Lin", "Rel", "Jiang", "Wang")) {
    stop("Chosen measure is not one of the recognised measures, c(\"Resnik\", \"Lin\", \"Rel\", \"Jiang\", \"Wang\").")
  }
  if (measure == "Wang") {
    computeIC <- FALSE
  }
  else {
    computeIC <- TRUE
  }
  ont <- pathway_df %>% .[["go_type"]] %>% unique()
  if (any(!ont %in% c("BP", "CC", "MF"))) {
    stop("Column go_type does not contain the recognised sub-ontologies, c(\"BP\", \"CC\", \"MF\")")
  }
  go_similarity <- setNames(object = vector(mode = "list", 
                                            length = length(ont)), nm = ont)
  for (i in 1:length(ont)) {
    print(stringr::str_c("Reducing sub-ontology: ", ont[i]))
    hsGO <- GOSemSim::godata(OrgDb = orgdb, ont = ont[i], 
                             computeIC = computeIC)
    terms <- pathway_df %>% dplyr::filter(.data$go_type == 
                                            ont[i]) %>% .[["go_id"]] %>% unique()
    sim <- GOSemSim::mgoSim(GO1 = terms, GO2 = terms, semData = hsGO, 
                            measure = measure, combine = NULL)
    go_similarity[[i]] <- rrvgo::reduceSimMatrix(simMatrix = sim, 
                                                 threshold = threshold, orgdb = orgdb) %>% 
      tibble::as_tibble() %>% dplyr::rename(parent_id = .data$parent, 
                                            parent_term = .data$parentTerm, parent_sim_score = .data$termUniqueness)
  }
  go_sim_df <- go_similarity %>% qdapTools::list_df2df(col1 = "go_type")
  pathway_go_sim_df <- pathway_df %>% 
    dplyr::inner_join(go_sim_df %>% 
                        dplyr::select(.data$go_type, go_id = .data$go, contains("parent")), 
                      by = c("go_type", "go_id")) %>% 
    dplyr::arrange(.data$go_type, .data$parent_id, -.data$parent_sim_score)
  
  return(pathway_go_sim_df)
}

2 GO enrichment analysis

Two different visualizations are presented: i) GO terms as extracted from enrichGO() and ii) GO terms collapsed by similarity from Regina H. Reynolds’ function go_reduce(), from her package rutils.

If several GO terms are combined into one, the GeneRatio and adjusted p-value shown in the plots correspond to the minimum adjusted p-value of the collapsed terms. The number in brackets next to the Y-axis labels represent the number of GO terms collapsed.

if(!file_test("-f", formula_res_merge_path, formula_res_merge_red_path)){
  formula_res_mf <- clusterProfiler::compareCluster(
    ENTREZID ~ sig_reg + tissue + test,
    data = cluster_data_sig, fun = enrichGO,
    OrgDb = org.Hs.eg.db, ont = "MF",
    universe = universe_genes,
    readable = T) %>%
    dplyr::mutate(go_type = "MF", go_id = ID)
  
  formula_res_bp <- clusterProfiler::compareCluster(
    ENTREZID ~ sig_reg + tissue + test,
    data = cluster_data_sig, fun = enrichGO,
    OrgDb = org.Hs.eg.db, ont = "BP",
    universe = universe_genes,
    readable = T) %>%
    dplyr::mutate(go_type = "BP", go_id = ID)
  
  formula_res_cc <- clusterProfiler::compareCluster(
    ENTREZID ~ sig_reg + tissue + test,
    data = cluster_data_sig, fun = enrichGO,
    OrgDb = org.Hs.eg.db, ont = "CC",
    universe = universe_genes,
    readable = T) %>%
    dplyr::mutate(go_type = "CC", go_id = ID)
  
  # Reduce the GO terms
  formula_res_mf_red <- formula_res_mf
  formula_res_bp_red <- formula_res_bp
  formula_res_cc_red <- formula_res_cc
  
  formula_res_mf_red@compareClusterResult %<>% updated_go_reduce(threshold = go_reduce_threshold)
  formula_res_bp_red@compareClusterResult %<>% updated_go_reduce(threshold = go_reduce_threshold)
  formula_res_cc_red@compareClusterResult %<>% updated_go_reduce(threshold = go_reduce_threshold)
  
  # Filter pathways
  
  formula_res_mf@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  formula_res_bp@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  formula_res_cc@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  

  formula_res_mf_red@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  formula_res_bp_red@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  formula_res_cc_red@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  
  # Merge ontologies
  formula_res_merge <- clusterProfiler::merge_result(list(mf = formula_res_mf,
                                                          bp = formula_res_bp,
                                                          cc = formula_res_cc))
  formula_res_merge_red <- clusterProfiler::merge_result(list(mf = formula_res_mf_red,
                                                              bp = formula_res_bp_red,
                                                              cc = formula_res_cc_red))
  
  # Rename ontologies
  names(formula_res_merge@compareClusterResult)[1] <- "Ontology"
  names(formula_res_merge_red@compareClusterResult)[1] <- "Ontology"
  
  # Save results
  formula_res_merge %>% saveRDS(formula_res_merge_path)
  formula_res_merge_red %>% saveRDS(formula_res_merge_red_path)
}else{
  formula_res_merge <- readRDS(formula_res_merge_path)
  formula_res_merge_red <- readRDS(formula_res_merge_red_path)
}

2.1 Collapsed brain regions

Both

Expand to see results for default GO terms.
formula_res_merge_both <- formula_res_merge %>% dplyr::filter(tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, drop_facet = F)
}else{
  print("No pathways found in this scenario.")
}

plot_df$tissue<-"Collapse"
plot_df_collapse<-plot_df


Expand to see results for reduced GO terms.
formula_res_merge_red_both <- formula_res_merge_red %>% dplyr::filter(tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_red_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, drop_facet = F)
}else{
  print("No pathways found in this scenario.")
}

plot_df$tissue<-"Collapse"
plot_df_collapse_red<-plot_df


Upregulated

Expand to see results for default GO terms.
formula_res_merge_up <- formula_res_merge %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."


Expand to see results for reduced GO terms.
formula_res_merge_red_up <- formula_res_merge_red %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_red_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."


Downregulated

Expand to see results for default GO terms.
formula_res_merge_down <- formula_res_merge %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}


Expand to see results for reduced GO terms.
formula_res_merge_red_down <- formula_res_merge_red %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_red_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}


2.2 By brain regions

Both

Expand to see results for default GO terms.
formula_res_merge_both <- formula_res_merge %>% dplyr::filter(tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue", drop_facet = T)
}else{
  print("No pathways found in this scenario.")
}

plot_df_indv_bas<-plot_df


Expand to see results for reduced GO terms.
formula_res_merge_red_both <- formula_res_merge_red %>% dplyr::filter(tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_red_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue", drop_facet = T) + theme(axis.text.y = element_text(size=12))
}else{
  print("No pathways found in this scenario.")
}

plot_df_indv_bas_red<-plot_df


Upregulated

Expand to see results for default GO terms.
formula_res_merge_up <- formula_res_merge %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}


Expand to see results for reduced GO terms.
formula_res_merge_red_up <- formula_res_merge_red %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_red_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}


Downregulated

Expand to see results for default GO terms.
formula_res_merge_down <- formula_res_merge %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}


Expand to see results for reduced GO terms.
formula_res_merge_red_down <- formula_res_merge_red %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_merge_red_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}


2.3 Merge to output to csv

go_merge_no_red<-rbind(plot_df_collapse, plot_df_indv_bas) %>% arrange(p.adjust)

write_csv(x = go_merge_no_red, file = file.path(gene_enrichment_results_path, "PD_collapsed_Sig_GO_terms_no_reduction.csv"))

go_merge_red<-rbind(plot_df_collapse_red,plot_df_indv_bas_red) %>% arrange(p.adjust)

write_csv(x = go_merge_red, file = file.path(gene_enrichment_results_path, "PD_collapsed_Sig_GO_terms_merged_with_term_reduction.csv"))

3 KEGG enrichment analysis

if(!file_test("-f", formula_res_KEGG_path)){
  formula_res_KEGG <- clusterProfiler::compareCluster(
    ENTREZID ~ sig_reg + tissue + test,
    data = cluster_data_sig, fun = enrichKEGG,
    organism = "hsa",
    universe = universe_genes) %>%
    clusterProfiler::setReadable(OrgDb = org.Hs.eg.db, keyType = "ENTREZID")
  
  formula_res_KEGG@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  
  # Save results
  formula_res_KEGG %>% saveRDS(formula_res_KEGG_path)
}else{
  formula_res_KEGG <- readRDS(formula_res_KEGG_path)
}

3.1 Collapsed brain regions

Both

formula_res_KEGG_both <- formula_res_KEGG %>% dplyr::filter(tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_KEGG_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, drop_facet = F)
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."
plot_df$tissue<-"Collapse"
kegg_plot_df_collapse<- plot_df

Upregulated

formula_res_KEGG_up <- formula_res_KEGG %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_KEGG_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."

Downregulated

formula_res_KEGG_down <- formula_res_KEGG %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_KEGG_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."

3.2 By brain regions

Both

formula_res_KEGG_both <- formula_res_KEGG %>% dplyr::filter(tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_KEGG_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue", drop_facet = F)
}else{
  print("No pathways found in this scenario.")
}

kegg_plot_df_indv_bas<- plot_df

Upregulated

formula_res_KEGG_up <- formula_res_KEGG %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_KEGG_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."

Downregulated

formula_res_KEGG_down <- formula_res_KEGG %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_KEGG_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}

3.3 Merge to output to csv

kegg_merge<-rbind(kegg_plot_df_collapse, kegg_plot_df_indv_bas) %>% arrange(p.adjust)

write_csv(x = kegg_merge, file = file.path(gene_enrichment_results_path, "PD_collapsed_Sig_KEGG_terms.csv"))

4 REACTOME enrichment analysis

if(!file_test("-f", formula_res_REACTOME_path)){
  formula_res_REACTOME <- clusterProfiler::compareCluster(
    ENTREZID ~ sig_reg + tissue + test,
    data = cluster_data_sig, fun = "enrichPathway",
    readable = T)
  
  formula_res_REACTOME@compareClusterResult %<>% dplyr::filter(Count >= count_threshold)
  
  # Save results
  formula_res_REACTOME %>% saveRDS(formula_res_REACTOME_path)
}else{
  formula_res_REACTOME <- readRDS(formula_res_REACTOME_path)
}

4.1 Collapsed brain regions

Both

formula_res_REACTOME_both <- formula_res_REACTOME %>% dplyr::filter(tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_REACTOME_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, drop_facet = F)
}else{
  print("No pathways found in this scenario.")
}

plot_df$tissue<-"Collapse"
reactome_plot_df_collapse <- plot_df

Upregulated

formula_res_REACTOME_up <- formula_res_REACTOME %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_REACTOME_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."

Downregulated

formula_res_REACTOME_down <- formula_res_REACTOME %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue == "Collapse")
plot_df <- generatePlotDataframe(formula_res_REACTOME_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df)
}else{
  print("No pathways found in this scenario.")
}

4.2 By brain regions

Both

formula_res_REACTOME_both <- formula_res_REACTOME %>% dplyr::filter(tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_REACTOME_both)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue", drop_facet = F)
}else{
  print("No pathways found in this scenario.")
}

reactome_plot_df_indv_bas <- plot_df

Upregulated

formula_res_REACTOME_up <- formula_res_REACTOME %>% dplyr::filter(grepl("Upregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_REACTOME_up)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}
## [1] "No pathways found in this scenario."

Downregulated

formula_res_REACTOME_down <- formula_res_REACTOME %>% dplyr::filter(grepl("Downregulated", sig_reg), tissue != "Collapse")
plot_df <- generatePlotDataframe(formula_res_REACTOME_down)

if(nrow(plot_df) > 0){
  drawPathwaysGO(plot_df, x = "tissue")
}else{
  print("No pathways found in this scenario.")
}

4.3 Merge to output to csv

reactome_merge<-rbind(reactome_plot_df_collapse,reactome_plot_df_indv_bas) %>% arrange(p.adjust)

write_csv(x = reactome_merge, file = file.path(gene_enrichment_results_path, "PD_collapsed_Sig_REACTOME_terms.csv"))

5 Session Info

Click to Show
sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.0 (2023-04-21)
##  os       Ubuntu 20.04.6 LTS
##  system   x86_64, linux-gnu
##  ui       X11
##  language (EN)
##  collate  en_GB.UTF-8
##  ctype    en_GB.UTF-8
##  tz       Etc/UTC
##  date     2024-12-04
##  pandoc   3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version    date (UTC) lib source
##  abind                  1.4-8      2024-09-12 [1] CRAN (R 4.3.0)
##  AnnotationDbi        * 1.64.1     2023-11-03 [1] Bioconductor
##  aod                    1.3.3      2023-12-13 [1] CRAN (R 4.3.0)
##  ape                    5.8        2024-04-11 [1] CRAN (R 4.3.0)
##  aplot                  0.2.3      2024-06-17 [1] CRAN (R 4.3.0)
##  backports              1.5.0      2024-05-23 [1] CRAN (R 4.3.0)
##  Biobase              * 2.62.0     2023-10-24 [1] Bioconductor
##  BiocGenerics         * 0.48.1     2023-11-01 [1] Bioconductor
##  BiocParallel         * 1.36.0     2023-10-24 [1] Bioconductor
##  Biostrings             2.70.3     2024-03-13 [1] Bioconductor 3.18 (R 4.3.0)
##  bit                    4.5.0      2024-09-20 [1] CRAN (R 4.3.0)
##  bit64                  4.5.2      2024-09-22 [1] CRAN (R 4.3.0)
##  bitops                 1.0-9      2024-10-03 [1] CRAN (R 4.3.0)
##  blob                   1.2.4      2023-03-17 [1] CRAN (R 4.3.0)
##  bookdown               0.41       2024-10-16 [1] CRAN (R 4.3.0)
##  boot                   1.3-31     2024-08-28 [1] CRAN (R 4.3.0)
##  broom                  1.0.7      2024-09-26 [1] CRAN (R 4.3.0)
##  bslib                  0.8.0      2024-07-29 [1] CRAN (R 4.3.0)
##  cachem                 1.1.0      2024-05-16 [1] CRAN (R 4.3.0)
##  caTools                1.18.3     2024-09-04 [1] CRAN (R 4.3.0)
##  cli                    3.6.3      2024-06-21 [1] CRAN (R 4.3.0)
##  clusterProfiler      * 4.10.1     2024-03-08 [1] Bioconductor 3.18 (R 4.3.0)
##  codetools              0.2-20     2024-03-31 [1] CRAN (R 4.3.0)
##  colorspace             2.1-1      2024-07-26 [1] CRAN (R 4.3.0)
##  corpcor                1.6.10     2021-09-16 [1] CRAN (R 4.3.0)
##  cowplot                1.1.3      2024-01-22 [1] CRAN (R 4.3.0)
##  crayon                 1.5.3      2024-06-20 [1] CRAN (R 4.3.0)
##  data.table             1.16.2     2024-10-10 [1] CRAN (R 4.3.0)
##  DBI                    1.2.3      2024-06-02 [1] CRAN (R 4.3.0)
##  DelayedArray           0.28.0     2023-10-24 [1] Bioconductor
##  DESeq2               * 1.42.1     2024-03-06 [1] Bioconductor 3.18 (R 4.3.0)
##  digest                 0.6.37     2024-08-19 [1] CRAN (R 4.3.0)
##  doParallel           * 1.0.17     2022-02-07 [1] CRAN (R 4.3.0)
##  DOSE                   3.28.2     2023-12-10 [1] Bioconductor 3.18 (R 4.3.0)
##  dplyr                * 1.1.4      2023-11-17 [1] CRAN (R 4.3.0)
##  edgeR                * 4.0.16     2024-02-18 [1] Bioconductor 3.18 (R 4.3.0)
##  enrichplot           * 1.22.0     2023-10-24 [1] Bioconductor
##  EnvStats               3.0.0      2024-08-24 [1] CRAN (R 4.3.0)
##  evaluate               1.0.1      2024-10-10 [1] CRAN (R 4.3.0)
##  fANCOVA                0.6-1      2020-11-13 [1] CRAN (R 4.3.0)
##  fansi                  1.0.6      2023-12-08 [1] CRAN (R 4.3.0)
##  farver                 2.1.2      2024-05-13 [1] CRAN (R 4.3.0)
##  fastmap                1.2.0      2024-05-15 [1] CRAN (R 4.3.0)
##  fastmatch              1.1-4      2023-08-18 [1] CRAN (R 4.3.0)
##  fgsea                * 1.28.0     2023-10-24 [1] Bioconductor
##  forcats              * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
##  foreach              * 1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
##  fs                     1.6.5      2024-10-30 [1] CRAN (R 4.3.0)
##  generics               0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
##  GenomeInfoDb         * 1.38.8     2024-03-15 [1] Bioconductor 3.18 (R 4.3.0)
##  GenomeInfoDbData       1.2.11     2023-11-20 [1] Bioconductor
##  GenomicRanges        * 1.54.1     2023-10-29 [1] Bioconductor
##  ggforce                0.4.2      2024-02-19 [1] CRAN (R 4.3.0)
##  ggfun                  0.1.7      2024-10-24 [1] CRAN (R 4.3.0)
##  ggh4x                * 0.2.8      2024-01-23 [1] CRAN (R 4.3.0)
##  ggplot2              * 3.5.1      2024-04-23 [1] CRAN (R 4.3.0)
##  ggplotify              0.1.2      2023-08-09 [1] CRAN (R 4.3.0)
##  ggraph                 2.2.1      2024-03-07 [1] CRAN (R 4.3.0)
##  ggrepel                0.9.6      2024-09-07 [1] CRAN (R 4.3.0)
##  ggtree                 3.10.1     2024-02-25 [1] Bioconductor 3.18 (R 4.3.0)
##  glue                   1.8.0      2024-09-30 [1] CRAN (R 4.3.0)
##  GO.db                  3.18.0     2023-11-23 [1] Bioconductor
##  GOSemSim               2.28.1     2024-01-17 [1] Bioconductor 3.18 (R 4.3.0)
##  gplots                 3.2.0      2024-10-05 [1] CRAN (R 4.3.0)
##  graphlayouts           1.2.0      2024-09-24 [1] CRAN (R 4.3.0)
##  gridExtra              2.3        2017-09-09 [1] CRAN (R 4.3.0)
##  gridGraphics           0.5-1      2020-12-13 [1] CRAN (R 4.3.0)
##  grpSciRmdTheme       * 0.4.0.1    2023-12-03 [1] Github (guillermo1996/grpSciRmdTheme@5f7645e)
##  gson                   0.1.0      2023-03-07 [1] CRAN (R 4.3.0)
##  gtable                 0.3.6      2024-10-25 [1] CRAN (R 4.3.0)
##  gtools                 3.9.5      2023-11-20 [1] CRAN (R 4.3.0)
##  HDO.db                 0.99.1     2023-11-23 [1] Bioconductor
##  hms                    1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
##  htmltools              0.5.8.1    2024-04-04 [1] CRAN (R 4.3.0)
##  httr                   1.4.7      2023-08-15 [1] CRAN (R 4.3.0)
##  igraph                 2.1.1      2024-10-19 [1] CRAN (R 4.3.0)
##  IRanges              * 2.36.0     2023-10-24 [1] Bioconductor
##  iterators            * 1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
##  jquerylib              0.1.4      2021-04-26 [1] CRAN (R 4.3.0)
##  jsonlite               1.8.9      2024-09-20 [1] CRAN (R 4.3.0)
##  KEGGREST               1.42.0     2023-10-24 [1] Bioconductor
##  KernSmooth             2.23-24    2024-05-17 [1] CRAN (R 4.3.0)
##  knitr                  1.49       2024-11-08 [1] CRAN (R 4.3.0)
##  lattice                0.22-6     2024-03-20 [1] CRAN (R 4.3.0)
##  lazyeval               0.2.2      2019-03-15 [1] CRAN (R 4.3.0)
##  lifecycle              1.0.4      2023-11-07 [1] CRAN (R 4.3.0)
##  limma                * 3.58.1     2023-10-31 [1] Bioconductor
##  lme4                   1.1-35.5   2024-07-03 [1] CRAN (R 4.3.0)
##  lmerTest               3.1-3      2020-10-23 [1] CRAN (R 4.3.0)
##  locfit                 1.5-9.10   2024-06-24 [1] CRAN (R 4.3.0)
##  lubridate            * 1.9.3      2023-09-27 [1] CRAN (R 4.3.0)
##  magrittr             * 2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
##  MASS                   7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.0)
##  Matrix                 1.6-5      2024-01-11 [1] CRAN (R 4.3.0)
##  MatrixGenerics       * 1.14.0     2023-10-24 [1] Bioconductor
##  matrixStats          * 1.4.1      2024-09-08 [1] CRAN (R 4.3.0)
##  memoise                2.0.1      2021-11-26 [1] CRAN (R 4.3.0)
##  minqa                  1.2.8      2024-08-17 [1] CRAN (R 4.3.0)
##  munsell                0.5.1      2024-04-01 [1] CRAN (R 4.3.0)
##  mvtnorm                1.3-2      2024-11-04 [1] CRAN (R 4.3.0)
##  nlme                   3.1-166    2024-08-14 [1] CRAN (R 4.3.0)
##  nloptr                 2.1.1      2024-06-25 [1] CRAN (R 4.3.0)
##  numDeriv               2016.8-1.1 2019-06-06 [1] CRAN (R 4.3.0)
##  org.Hs.eg.db         * 3.18.0     2023-11-23 [1] Bioconductor
##  patchwork            * 1.3.0      2024-09-16 [1] CRAN (R 4.3.0)
##  pbkrtest               0.5.3      2024-06-26 [1] CRAN (R 4.3.0)
##  pillar                 1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
##  pkgconfig              2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
##  plyr                   1.8.9      2023-10-02 [1] CRAN (R 4.3.0)
##  png                    0.1-8      2022-11-29 [1] CRAN (R 4.3.0)
##  polyclip               1.10-7     2024-07-23 [1] CRAN (R 4.3.0)
##  purrr                * 1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
##  qvalue                 2.34.0     2023-10-24 [1] Bioconductor
##  R6                     2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
##  rbibutils              2.3        2024-10-04 [1] CRAN (R 4.3.0)
##  RColorBrewer           1.1-3      2022-04-03 [1] CRAN (R 4.3.0)
##  Rcpp                   1.0.13-1   2024-11-02 [1] CRAN (R 4.3.0)
##  RCurl                  1.98-1.16  2024-07-11 [1] CRAN (R 4.3.0)
##  Rdpack                 2.6.1      2024-08-06 [1] CRAN (R 4.3.0)
##  readr                * 2.1.5      2024-01-10 [1] CRAN (R 4.3.0)
##  remaCor                0.0.18     2024-02-08 [1] CRAN (R 4.3.0)
##  reshape2               1.4.4      2020-04-09 [1] CRAN (R 4.3.0)
##  RhpcBLASctl            0.23-42    2023-02-11 [1] CRAN (R 4.3.0)
##  rlang                  1.1.4      2024-06-04 [1] CRAN (R 4.3.0)
##  rmarkdown              2.29       2024-11-04 [1] CRAN (R 4.3.0)
##  RSQLite                2.3.7      2024-05-27 [1] CRAN (R 4.3.0)
##  rstudioapi             0.17.1     2024-10-22 [1] CRAN (R 4.3.0)
##  S4Arrays               1.2.1      2024-03-04 [1] Bioconductor 3.18 (R 4.3.0)
##  S4Vectors            * 0.40.2     2023-11-23 [1] Bioconductor 3.18 (R 4.3.0)
##  sass                   0.4.9      2024-03-15 [1] CRAN (R 4.3.0)
##  scales               * 1.3.0      2023-11-28 [1] CRAN (R 4.3.0)
##  scatterpie             0.2.4      2024-08-28 [1] CRAN (R 4.3.0)
##  sessioninfo            1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
##  shadowtext             0.1.4      2024-07-18 [1] CRAN (R 4.3.0)
##  SparseArray            1.2.4      2024-02-11 [1] Bioconductor 3.18 (R 4.3.0)
##  statmod                1.5.0      2023-01-06 [1] CRAN (R 4.3.0)
##  stringi                1.8.4      2024-05-06 [1] CRAN (R 4.3.0)
##  stringr              * 1.5.1      2023-11-14 [1] CRAN (R 4.3.0)
##  SummarizedExperiment * 1.32.0     2023-10-24 [1] Bioconductor
##  tibble               * 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
##  tidygraph              1.3.1      2024-01-30 [1] CRAN (R 4.3.0)
##  tidyr                * 1.3.1      2024-01-24 [1] CRAN (R 4.3.0)
##  tidyselect             1.2.1      2024-03-11 [1] CRAN (R 4.3.0)
##  tidytree               0.4.6      2023-12-12 [1] CRAN (R 4.3.0)
##  tidyverse            * 2.0.0      2023-02-22 [1] CRAN (R 4.3.0)
##  timechange             0.3.0      2024-01-18 [1] CRAN (R 4.3.0)
##  treeio                 1.26.0     2023-10-24 [1] Bioconductor
##  tweenr                 2.0.3      2024-02-26 [1] CRAN (R 4.3.0)
##  tzdb                   0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
##  utf8                   1.2.4      2023-10-22 [1] CRAN (R 4.3.0)
##  variancePartition    * 1.32.5     2024-02-16 [1] Bioconductor 3.18 (R 4.3.0)
##  vctrs                  0.6.5      2023-12-01 [1] CRAN (R 4.3.0)
##  viridis                0.6.5      2024-01-29 [1] CRAN (R 4.3.0)
##  viridisLite            0.4.2      2023-05-02 [1] CRAN (R 4.3.0)
##  vroom                  1.6.5      2023-12-05 [1] CRAN (R 4.3.0)
##  withr                  3.0.2      2024-10-28 [1] CRAN (R 4.3.0)
##  xfun                   0.49       2024-10-31 [1] CRAN (R 4.3.0)
##  XVector                0.42.0     2023-10-24 [1] Bioconductor
##  yaml                   2.3.10     2024-07-26 [1] CRAN (R 4.3.0)
##  yulab.utils            0.1.8      2024-11-07 [1] CRAN (R 4.3.0)
##  zlibbioc               1.48.2     2024-03-13 [1] Bioconductor 3.18 (R 4.3.0)
## 
##  [1] /opt/R/4.3.0/lib/R/library
## 
## ──────────────────────────────────────────────────────────────────────────────